Exploring CSW access in Python using OWSLib with CIDA Geonetwork


In [1]:
from IPython.core.display import HTML
HTML('<iframe src=http://cida.usgs.gov/geonetwork/srv/en/main.home width=900 height=280></iframe>')


Out[1]:

In [1]:
from owslib.csw import CatalogueServiceWeb

In [2]:
# connect to CSW, explore it's properties
#endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'  # NODC Geoportal: collection level
    
#endpoint = 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw'  # NRCAN CUSTOM
#endpoint = 'http://geoport.whoi.edu/gi-cat/services/cswiso' # USGS Woods Hole GI_CAT
endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw' # USGS CIDA Geonetwork
#endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC Geoportal: granule level

csw = CatalogueServiceWeb(endpoint,timeout=30)
csw.version


Out[2]:
'2.0.2'

In [3]:
[op.name for op in csw.operations]


Out[3]:
['GetCapabilities',
 'DescribeRecord',
 'GetDomain',
 'GetRecords',
 'GetRecordById',
 'Transaction']

In [4]:
#bbox=[-141,42,-52,84]
bbox=[-71.5, 39.5, -63.0, 46]
csw.getrecords(keywords=['sea_water_temperature'],bbox=bbox,maxrecords=10)
csw.results


Out[4]:
{'matches': 0, 'nextrecord': 0, 'returned': 0}

In [5]:
csw.getrecords(maxrecords=5)

In [6]:
for rec,item in csw.records.iteritems():
    print item.title


Great Lakes Coastal Forecasting System Nowcast/Lake Superior Nowcast History 2D
Great Lakes Coastal Forecasting System Nowcast/Lake Erie Nowcast History 2D
Great Lakes Coastal Forecasting System Nowcast/Lake Ontario Nowcast History 2D
Great Lakes Coastal Forecasting System Nowcast/Lake Huron Nowcast History 2D
Great Lakes Coastal Forecasting System Nowcast/Lake Michigan Nowcast History 2D

In [7]:
print(csw.records.keys())


['e3b464e6-a32e-4ccb-b6bb-7ebcddf19b61', '9ea89a4f-9eb0-4d6e-be2a-dbc9154c73e7', '07379b46-6fb3-4889-91e3-fb5a76d76a98', '2c511d5a-5dd0-452d-a918-fb3a99c1c8b7', '05bf15c9-fd87-4e19-87f6-68a3c682805f']

In [8]:
# choose a sample record
a=csw.records['e3b464e6-a32e-4ccb-b6bb-7ebcddf19b61']

In [9]:
print a.title


Great Lakes Coastal Forecasting System Nowcast/Lake Superior Nowcast History 2D

In [10]:
# unfortunately the "uris" property is empty
print a.uris


[]

In [11]:
# yet I can see the URIs here:
print a.xml


<csw:SummaryRecord xmlns:geonet="http://www.fao.org/geonetwork" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      <dc:identifier>e3b464e6-a32e-4ccb-b6bb-7ebcddf19b61</dc:identifier>
      <dc:title>Great Lakes Coastal Forecasting System Nowcast/Lake Superior Nowcast History 2D</dc:title>
      <dc:subject>inlandWaters</dc:subject>
      <dct:abstract>Great Lakes Coastal Forecasting System Nowcast/Lake Superior Nowcast History 2D</dct:abstract>
    </csw:SummaryRecord>
    

In [12]:
# lets look at the references
a.references


Out[12]:
[]

In [13]:
# get specific ServiceType URL from records
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP'):
    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

In [14]:
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink')
print dap_urls


[]

In [15]: